import { PlaywrightCrawler, ProxyConfiguration, createPlaywrightRouter } from 'crawlee';

const startUrls = ['https://crawlee.dev'];

const router = createPlaywrightRouter();

router.addDefaultHandler(async ({ enqueueLinks, log }) => {
    log.info(`将新URL排入队列`);
    await enqueueLinks({
        globs: ['https://crawlee.dev/**'],
        label: 'detail',
    });
});

router.addHandler('detail', async ({ request, page, log, pushData }) => {
    const title = await page.title();
    log.info(`${title}`, { url: request.loadedUrl });

    await pushData({
        url: request.loadedUrl,
        title,
    });
});

const crawler = new PlaywrightCrawler({
    // 代理配置: new ProxyConfiguration({ proxyUrls: ['...'] }),
    requestHandler: router,

    headless: false, // 将在可见的浏览器窗口中运行

    maxRequestsPerCrawl: 20, // 每次抓取的最大请求数
});

await crawler.run(startUrls); // 将第一个 URL 添加到队列并开始爬网
